linux: More save/restore fixes. Fix deadlock of cpu_hotplug_lock vs
authorkfraser@localhost.localdomain <kfraser@localhost.localdomain>
Wed, 28 Feb 2007 17:55:19 +0000 (17:55 +0000)
committerkfraser@localhost.localdomain <kfraser@localhost.localdomain>
Wed, 28 Feb 2007 17:55:19 +0000 (17:55 +0000)
workqueue_mutex. This is a new deadlock since the workqueue_mutex is
acquired in the workqueue_cpu_calbback() function across
CPU_UP_PREPARE->CPU_ONLINE.

The fix is for us not to rudely grab the cpu_hotplug_lock() during
save/restore -- it's really not necessary.

This patch is applicable to any of our 2.6 kernels, but is absolutely
required from 2.6.18 onwards.

Signed-off-by: Keir Fraser <keir@xensource.com>
linux-2.6-xen-sparse/drivers/xen/core/cpu_hotplug.c
linux-2.6-xen-sparse/drivers/xen/core/machine_reboot.c
linux-2.6-xen-sparse/drivers/xen/core/smpboot.c
linux-2.6-xen-sparse/include/xen/cpu_hotplug.h

index b272647e5dff58600a23376bd3a414207713b194..5602088e3ee1a5808c245bc65f14fa65aa10f4c4 100644 (file)
@@ -121,29 +121,19 @@ arch_initcall(setup_vcpu_hotplug_event);
 
 int smp_suspend(void)
 {
-       int i, err;
-
-       lock_cpu_hotplug();
-
-       /*
-        * Take all other CPUs offline. We hold the hotplug mutex to
-        * avoid other processes bringing up CPUs under our feet.
-        */
-       while (num_online_cpus() > 1) {
-               unlock_cpu_hotplug();
-               for_each_online_cpu(i) {
-                       if (i == 0)
-                               continue;
-                       err = cpu_down(i);
-                       if (err) {
-                               printk(KERN_CRIT "Failed to take all CPUs "
-                                      "down: %d.\n", err);
-                               for_each_possible_cpu(i)
-                                       vcpu_hotplug(i);
-                               return err;
-                       }
+       int cpu, err;
+
+       for_each_online_cpu(cpu) {
+               if (cpu == 0)
+                       continue;
+               err = cpu_down(cpu);
+               if (err) {
+                       printk(KERN_CRIT "Failed to take all CPUs "
+                              "down: %d.\n", err);
+                       for_each_possible_cpu(cpu)
+                               vcpu_hotplug(cpu);
+                       return err;
                }
-               lock_cpu_hotplug();
        }
 
        return 0;
@@ -153,11 +143,6 @@ void smp_resume(void)
 {
        int cpu;
 
-       for_each_possible_cpu(cpu)
-               cpu_initialize_context(cpu);
-
-       unlock_cpu_hotplug();
-
        for_each_possible_cpu(cpu)
                vcpu_hotplug(cpu);
 }
index 07de13c835500e7fc8906599cd1c30a5555549fd..95337b80777293aeece696e6fd95b5281f0a292f 100644 (file)
@@ -97,6 +97,9 @@ static void post_suspend(int suspend_cancelled)
                        pfn_to_mfn(xen_start_info->store_mfn);
                xen_start_info->console.domU.mfn =
                        pfn_to_mfn(xen_start_info->console.domU.mfn);
+       } else {
+               extern cpumask_t cpu_initialized_map;
+               cpu_initialized_map = cpumask_of_cpu(0);
        }
        
        set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info);
@@ -147,13 +150,20 @@ int __xen_suspend(void)
        }
 #endif
 
-       err = smp_suspend();
-       if (err)
-               return err;
+       for (;;) {
+               err = smp_suspend();
+               if (err)
+                       return err;
+
+               xenbus_suspend();
+               preempt_disable();
 
-       xenbus_suspend();
+               if (num_online_cpus() == 1)
+                       break;
 
-       preempt_disable();
+               preempt_enable();
+               xenbus_suspend_cancel();
+       }
 
        mm_pin_all();
        local_irq_disable();
index 31ab514b4e1e585ed61e07b3d2da3769dde6779c..fd8b1701863bba46238e3f81a7c84abe0560b65f 100644 (file)
@@ -47,7 +47,7 @@ cpumask_t cpu_online_map;
 EXPORT_SYMBOL(cpu_online_map);
 cpumask_t cpu_possible_map;
 EXPORT_SYMBOL(cpu_possible_map);
-static cpumask_t cpu_initialized_map;
+cpumask_t cpu_initialized_map;
 
 struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
 EXPORT_SYMBOL(cpu_data);
@@ -185,7 +185,7 @@ static void cpu_bringup_and_idle(void)
        cpu_idle();
 }
 
-void cpu_initialize_context(unsigned int cpu)
+static void cpu_initialize_context(unsigned int cpu)
 {
        vcpu_guest_context_t ctxt;
        struct task_struct *idle = idle_task(cpu);
@@ -195,7 +195,7 @@ void cpu_initialize_context(unsigned int cpu)
        struct Xgt_desc_struct *gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
 #endif
 
-       if (cpu == 0)
+       if (cpu_test_and_set(cpu, cpu_initialized_map))
                return;
 
        memset(&ctxt, 0, sizeof(ctxt));
@@ -417,10 +417,7 @@ int __devinit __cpu_up(unsigned int cpu)
        if (rc)
                return rc;
 
-       if (!cpu_isset(cpu, cpu_initialized_map)) {
-               cpu_set(cpu, cpu_initialized_map);
-               cpu_initialize_context(cpu);
-       }
+       cpu_initialize_context(cpu);
 
        if (num_online_cpus() == 1)
                alternatives_smp_switch(1);
index 10ff259e57efc0868633b2ca14466952ba871165..234503f1ea1a1148b52620f22fb3e3d844fa3106 100644 (file)
@@ -6,12 +6,6 @@
 
 #if defined(CONFIG_HOTPLUG_CPU)
 
-#if defined(CONFIG_X86)
-void cpu_initialize_context(unsigned int cpu);
-#else
-#define cpu_initialize_context(cpu)    ((void)0)
-#endif
-
 int cpu_up_check(unsigned int cpu);
 void init_xenbus_allowed_cpumask(void);
 int smp_suspend(void);